图表说明 | Figure Legend:
A 面板 | Panel A: 展示了31个低氧相关基因在6个样品中的表达水平热图。颜色从蓝色(低表达)到红色(高表达)变化。样品按条件分组(正常氧气、低氧、高度低氧)。
B 面板 | Panel B: 散点图显示了每个基因与低氧评分的皮尔逊相关系数和对应的-log10 p值。横线代表0.05显著性水平。
C 面板 | Panel C: 按功能通路分组展示的基因表达情况,包括转录因子、血管生成、代谢重编程等关键通路。
D 面板 | Panel D: 基于表达相关性(>0.6)构建的基因调控网络。
# 设置随机种子以保证可重复性 | Set seed for reproducibility
set.seed(2025)
# 生成基因表达数据 | Generate gene expression data
genes <- c(
"HIF1A", "HIF2A", "HIF3A", "VEGFA", "VEGFB", "VEGFC",
"EPO", "BNIP3", "BNIP3L", "CAIX", "PHD1", "PHD2", "PHD3",
"PGK1", "LDHA", "SLC16A1", "GLUT1", "GLUT3", "ENO1",
"PFKFB3", "PFKFB4", "PKM2", "PDK1", "PDK3", "GBE1",
"NDRG1", "ANKRD37", "KDM4A", "LOX", "SERPINE1", "CTGF"
)
# 创建样品名称 | Create sample names (6个细胞系 | 6 cell lines)
samples <- paste0("Cell_", 1:6)
conditions <- c("Normoxia", "Normoxia", "Hypoxia", "Hypoxia", "Hypoxia_High", "Hypoxia_High")
# 生成表达矩阵 | Generate expression matrix
n_genes <- length(genes)
n_samples <- length(samples)
# 正常氧气条件下的基线表达 | Baseline expression under normoxia
baseline_expr <- rnorm(n_genes, mean = 5, sd = 1.5)
# 创建表达数据矩阵 | Create expression data matrix
expr_matrix <- matrix(0, nrow = n_genes, ncol = n_samples)
rownames(expr_matrix) <- genes
colnames(expr_matrix) <- samples
for (i in 1:n_genes) {
for (j in 1:n_samples) {
if (j <= 2) {
# 正常氧气条件 | Normoxia condition
expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 0, sd = 0.5)
} else if (j <= 4) {
# 低氧条件 | Hypoxia condition (增加表达 | increased expression)
expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 3, sd = 1)
} else {
# 高度低氧条件 | High hypoxia condition (进一步增加 | further increased)
expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 5, sd = 1.2)
}
}
}
# 确保没有负值 | Ensure no negative values
expr_matrix[expr_matrix < 0] <- 0.1
# 计算低氧评分 | Calculate hypoxia scores
hypoxia_cores <- c(
"HIF1A", "VEGFA", "EPO", "CAIX", "PHD2", "LDHA", "GLUT1", "PGK1"
)
hypoxia_score <- colMeans(expr_matrix[rownames(expr_matrix) %in% hypoxia_cores, ])
# 创建元数据 | Create metadata
sample_info <- data.frame(
sample = samples,
condition = conditions,
hypoxia_score = hypoxia_score,
stringsAsFactors = FALSE
)
head(expr_matrix)
## Cell_1 Cell_2 Cell_3 Cell_4 Cell_5 Cell_6
## HIF1A 6.841070 5.295926 9.519951 8.296882 9.412157 11.271634
## HIF2A 4.312582 5.509402 9.024931 9.326589 11.808184 9.320163
## HIF3A 5.979455 5.395156 10.292995 8.718993 11.315257 12.205211
## VEGFA 7.161491 7.094180 8.680724 9.746536 13.214419 12.339775
## VEGFB 6.090059 5.870653 7.724884 7.864078 10.502715 11.269946
## VEGFC 5.075269 4.995429 9.128781 6.608034 6.814880 8.672643
head(sample_info)
## sample condition hypoxia_score
## Cell_1 Cell_1 Normoxia 5.736570
## Cell_2 Cell_2 Normoxia 5.370170
## Cell_3 Cell_3 Hypoxia 8.260937
## Cell_4 Cell_4 Hypoxia 8.438362
## Cell_5 Cell_5 Hypoxia_High 10.194875
## Cell_6 Cell_6 Hypoxia_High 10.564447
# 计算每个基因与低氧评分的相关系数 | Calculate correlation between genes and hypoxia scores
correlations <- apply(expr_matrix, 1, function(x) {
cor(x, sample_info$hypoxia_score)
})
# 计算p值 | Calculate p-values
p_values <- apply(expr_matrix, 1, function(x) {
cor.test(x, sample_info$hypoxia_score)$p.value
})
# 创建结果数据框 | Create results dataframe
gene_hypoxia_assoc <- data.frame(
gene = names(correlations),
correlation = correlations,
p_value = p_values,
log10_pval = -log10(p_values),
stringsAsFactors = FALSE
) %>%
arrange(desc(abs(correlation)))
head(gene_hypoxia_assoc, 10)
## gene correlation p_value log10_pval
## ANKRD37 ANKRD37 0.9889460 0.0001826115 3.738472
## PHD2 PHD2 0.9832064 0.0004206692 3.376059
## PDK3 PDK3 0.9793637 0.0006343938 3.197641
## LDHA LDHA 0.9779953 0.0007209834 3.142075
## HIF3A HIF3A 0.9765273 0.0008199829 3.086195
## KDM4A KDM4A 0.9725112 0.0011230647 2.949595
## VEGFB VEGFB 0.9696857 0.0013645035 2.865025
## PFKFB4 PFKFB4 0.9665635 0.0016583129 2.780334
## CTGF CTGF 0.9643244 0.0018864175 2.724362
## EPO EPO 0.9633960 0.0019852625 2.702182
# 标准化表达矩阵用于热图 | Normalize expression matrix for heatmap
expr_scaled <- t(scale(t(expr_matrix)))
# 准备绘图数据 | Prepare plotting data
heatmap_data <- expr_scaled %>%
as.data.frame() %>%
rownames_to_column("gene") %>%
pivot_longer(-gene, names_to = "sample", values_to = "expression")
# 添加条件信息 | Add condition information
heatmap_data <- heatmap_data %>%
left_join(sample_info, by = c("sample" = "sample"))
# 创建热图 | Create heatmap
p_heatmap <- ggplot(heatmap_data, aes(x = sample, y = gene, fill = expression)) +
geom_tile(color = "white", size = 0.5) +
scale_fill_gradient2(
low = "blue", mid = "white", high = "red",
name = "表达量\n(Expression)"
) +
facet_grid(. ~ condition, scales = "free_x", space = "free_x") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 8),
legend.position = "bottom"
) +
labs(
title = "A. 基因表达热图 | Gene Expression Heatmap",
x = "样品 | Sample",
y = "基因 | Gene"
)
print(p_heatmap)
# 绘制基因与低氧评分的相关性 | Plot gene-hypoxia correlation
p_scatter <- ggplot(gene_hypoxia_assoc, aes(x = correlation, y = -log10(p_value))) +
geom_point(aes(color = correlation, size = abs(correlation)), alpha = 0.7) +
scale_color_gradient2(
low = "blue", mid = "gray", high = "red",
name = "相关系数\n(Correlation)"
) +
scale_size_continuous(name = "绝对值\n(|correlation|)") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", alpha = 0.5) +
# 标注关键基因 | Annotate key genes
ggrepel::geom_text_repel(
data = gene_hypoxia_assoc %>% head(10),
aes(label = gene),
size = 3
) +
theme_minimal() +
labs(
title = "B. 基因与低氧评分的相关性 | Gene-Hypoxia Score Correlation",
x = "皮尔逊相关系数 | Pearson Correlation Coefficient",
y = "-log10(p值) | -log10(p-value)"
) +
theme(
legend.position = "right",
plot.title = element_text(hjust = 0.5, size = 12, face = "bold")
)
print(p_scatter)
# 定义低氧响应通路与基因 | Define hypoxia response pathways and genes
pathways <- list(
"转录因子" = c("HIF1A", "HIF2A", "HIF3A"),
"血管生成" = c("VEGFA", "VEGFB", "VEGFC", "ANGPT1"),
"造血" = c("EPO"),
"代谢重编程" = c("LDHA", "PKM2", "PFKFB3", "GLUT1", "PGK1"),
"缺氧标志" = c("CAIX", "BNIP3", "BNIP3L"),
"原癌基因激活" = c("MYC", "KRAS", "BRAF"),
"免疫检查点" = c("PD-L1", "PD-L2", "LAG3"),
"治疗靶点" = c("EGFR", "ALK", "ROS1", "BRAF", "KRAS")
)
# Transcription factors
# Angiogenesis
# Hematopoiesis
# Metabolic reprogramming
# Hypoxia markers
# Oncogene activation
# Immune checkpoints
# Therapeutic targets
# 创建通路数据 | Create pathway data
pathway_data <- data.frame(
gene = character(),
pathway = character(),
expression_mean = numeric(),
target_type = character(),
stringsAsFactors = FALSE
)
for (pathway in names(pathways)) {
genes_in_pathway <- pathways[[pathway]]
for (gene in genes_in_pathway) {
if (gene %in% rownames(expr_matrix)) {
expr_mean <- mean(expr_matrix[gene, 3:6]) # 低氧条件的平均表达
} else {
expr_mean <- runif(1, 3, 8) # 为不在矩阵中的基因生成随机值
}
target_type <- ifelse(
pathway %in% c("免疫检查点", "治疗靶点"),
"Drug Target",
"Biomarker"
)
# Drug Target | Biomarker
pathway_data <- rbind(pathway_data, data.frame(
gene = gene,
pathway = pathway,
expression_mean = expr_mean,
target_type = target_type,
stringsAsFactors = FALSE
))
}
}
# 按通路和表达量排序 | Sort by pathway and expression
pathway_data <- pathway_data %>%
arrange(pathway, desc(expression_mean))
# 绘制通路图 | Create pathway visualization
p_pathway <- ggplot(pathway_data, aes(x = expression_mean, y = reorder(gene, expression_mean))) +
geom_point(aes(color = pathway, size = expression_mean), alpha = 0.8) +
geom_segment(aes(xend = 0, yend = gene, color = pathway), alpha = 0.3) +
facet_wrap(~pathway, scales = "free_y", ncol = 1) +
scale_color_brewer(palette = "Set2", name = "通路 | Pathway") +
scale_size_continuous(name = "平均表达量\n(Mean Expression)") +
theme_minimal() +
theme(
strip.text = element_text(size = 10, face = "bold"),
axis.text.y = element_text(size = 9),
legend.position = "bottom"
) +
labs(
title = "C. 低氧响应通路与基因 | Hypoxia Response Pathways and Genes",
x = "平均表达量 | Mean Expression (Hypoxia)",
y = "基因 | Gene"
)
print(p_pathway)
# 创建基因相互作用网络 | Create gene interaction network
# 基于表达相关性 | Based on expression correlation
# 计算基因之间的相关性 | Calculate correlations between genes
gene_cor <- cor(t(expr_matrix[1:15, ])) # 使用前15个基因 | Use first 15 genes
# 转换为网络数据 | Convert to network data
gene_cor[lower.tri(gene_cor, diag = TRUE)] <- 0
network_edges <- which(abs(gene_cor) > 0.6, arr.ind = TRUE) %>%
as.data.frame() %>%
setNames(c("from", "to")) %>%
mutate(
from = rownames(gene_cor)[from],
to = rownames(gene_cor)[to]
)
# 创建图对象 | Create igraph object
if (nrow(network_edges) > 0) {
g <- graph_from_data_frame(network_edges, directed = FALSE)
p_network <- ggraph(g, layout = "fr") +
geom_edge_link(alpha = 0.3, color = "gray") +
geom_node_point(aes(size = 8), color = "steelblue", alpha = 0.8) +
geom_node_text(aes(label = name), repel = TRUE, size = 4) +
theme_graph() +
labs(title = "D. 基因调控网络 | Gene Regulatory Network")
print(p_network)
}
# 汇总信息 | Summary information
cat("=== 数据统计摘要 | Data Summary ===\n")
## === 数据统计摘要 | Data Summary ===
cat("基因数量 | Number of genes:", nrow(expr_matrix), "\n")
## 基因数量 | Number of genes: 31
cat("样品数量 | Number of samples:", ncol(expr_matrix), "\n")
## 样品数量 | Number of samples: 6
cat("条件类型 | Conditions:", paste(unique(sample_info$condition), collapse = ", "), "\n\n")
## 条件类型 | Conditions: Normoxia, Hypoxia, Hypoxia_High
cat("=== 前10个与低氧评分最相关的基因 | Top 10 Genes Correlated with Hypoxia Score ===\n")
## === 前10个与低氧评分最相关的基因 | Top 10 Genes Correlated with Hypoxia Score ===
print(head(gene_hypoxia_assoc, 10))
## gene correlation p_value log10_pval
## ANKRD37 ANKRD37 0.9889460 0.0001826115 3.738472
## PHD2 PHD2 0.9832064 0.0004206692 3.376059
## PDK3 PDK3 0.9793637 0.0006343938 3.197641
## LDHA LDHA 0.9779953 0.0007209834 3.142075
## HIF3A HIF3A 0.9765273 0.0008199829 3.086195
## KDM4A KDM4A 0.9725112 0.0011230647 2.949595
## VEGFB VEGFB 0.9696857 0.0013645035 2.865025
## PFKFB4 PFKFB4 0.9665635 0.0016583129 2.780334
## CTGF CTGF 0.9643244 0.0018864175 2.724362
## EPO EPO 0.9633960 0.0019852625 2.702182
cat("\n=== 表达变异分析 | Expression Variance Analysis ===\n")
##
## === 表达变异分析 | Expression Variance Analysis ===
cat("平均基因表达 | Mean gene expression:", round(mean(expr_matrix), 2), "\n")
## 平均基因表达 | Mean gene expression: 7.9
cat("基因表达标准差 | SD of gene expression:", round(sd(expr_matrix), 2), "\n")
## 基因表达标准差 | SD of gene expression: 2.7
cat("低氧评分范围 | Hypoxia score range:",
round(min(sample_info$hypoxia_score), 2), "-",
round(max(sample_info$hypoxia_score), 2), "\n")
## 低氧评分范围 | Hypoxia score range: 5.37 - 10.56
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.8.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] zh_CN.UTF-8/zh_CN.UTF-8/zh_CN.UTF-8/C/zh_CN.UTF-8/zh_CN.UTF-8
##
## time zone: Asia/Shanghai
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggraph_2.2.2 igraph_2.1.4 circlize_0.4.16
## [4] ComplexHeatmap_2.24.1 gridExtra_2.3 lubridate_1.9.4
## [7] forcats_1.0.1 stringr_1.5.2 dplyr_1.1.4
## [10] purrr_1.1.0 readr_2.1.5 tidyr_1.3.1
## [13] tibble_3.3.0 tidyverse_2.0.0 ggplot2_4.0.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 shape_1.4.6.1 rjson_0.2.23
## [4] xfun_0.53 bslib_0.9.0 GlobalOptions_0.1.2
## [7] ggrepel_0.9.6 tzdb_0.5.0 vctrs_0.6.5
## [10] tools_4.5.1 generics_0.1.4 stats4_4.5.1
## [13] parallel_4.5.1 cluster_2.1.8.1 pkgconfig_2.0.3
## [16] RColorBrewer_1.1-3 S7_0.2.0 S4Vectors_0.46.0
## [19] lifecycle_1.0.4 compiler_4.5.1 farver_2.1.2
## [22] ggforce_0.5.0 graphlayouts_1.2.2 codetools_0.2-20
## [25] clue_0.3-66 htmltools_0.5.8.1 sass_0.4.10
## [28] yaml_2.3.10 pillar_1.11.1 crayon_1.5.3
## [31] jquerylib_0.1.4 MASS_7.3-65 cachem_1.1.0
## [34] viridis_0.6.5 iterators_1.0.14 foreach_1.5.2
## [37] tidyselect_1.2.1 digest_0.6.37 stringi_1.8.7
## [40] labeling_0.4.3 polyclip_1.10-7 fastmap_1.2.0
## [43] colorspace_2.1-2 cli_3.6.5 magrittr_2.0.4
## [46] tidygraph_1.3.1 withr_3.0.2 scales_1.4.0
## [49] timechange_0.3.0 rmarkdown_2.30 matrixStats_1.5.0
## [52] png_0.1-8 GetoptLong_1.0.5 hms_1.1.3
## [55] memoise_2.0.1 evaluate_1.0.5 knitr_1.50
## [58] IRanges_2.42.0 doParallel_1.0.17 viridisLite_0.4.2
## [61] rlang_1.1.6 Rcpp_1.1.0 glue_1.8.0
## [64] tweenr_2.0.3 BiocGenerics_0.54.0 jsonlite_2.0.0
## [67] R6_2.6.1